In [1]:
import mxnet as mx
import numpy as np
from mxnet import gluon
from tqdm import tqdm_notebook as tqdm
mx.random.seed(1)
In [2]:
ctx = mx.cpu()
In [3]:
batch_size = 64
In [4]:
def transformer(data, label):
data = mx.image.imresize(data, 224, 224)
data = mx.nd.transpose(data, (2,0,1))
data = data.astype(np.float32)
return data, label
In [5]:
train_data = gluon.data.DataLoader(dataset=gluon.data.vision.CIFAR10('./data', train=True, transform=transformer),
batch_size=batch_size,
shuffle=True,
last_batch='discard')
test_data = gluon.data.DataLoader(dataset=gluon.data.vision.CIFAR10('./data', train=False, transform=transformer),
batch_size=batch_size,
shuffle=False,
last_batch='discard')
In [6]:
for d, l in train_data:
break
In [7]:
print(d.shape, l.shape)
In [8]:
d.dtype
Out[8]:
In [9]:
alex_net = gluon.nn.Sequential()
with alex_net.name_scope():
# First convolutional layer
alex_net.add(gluon.nn.Conv2D(channels=96,
kernel_size=11,
strides=(4,4),
activation='relu'))
alex_net.add(gluon.nn.MaxPool2D(pool_size=3,
strides=2))
# Second convolutional layer
alex_net.add(gluon.nn.Conv2D(channels=192,
kernel_size=5,
activation='relu'))
alex_net.add(gluon.nn.MaxPool2D(pool_size=3,
strides=(2,2)))
# Third convolutional layer
alex_net.add(gluon.nn.Conv2D(channels=384,
kernel_size=3,
activation='relu'))
# Fourth convolutional layer
alex_net.add(gluon.nn.Conv2D(channels=384,
kernel_size=3,
activation='relu'))
# Fifth convolutional layer
alex_net.add(gluon.nn.Conv2D(channels=256,
kernel_size=3,
activation='relu'))
alex_net.add(gluon.nn.MaxPool2D(pool_size=3,
strides=2))
# Flatten and apply fullly connected layers
alex_net.add(gluon.nn.Flatten())
alex_net.add(gluon.nn.Dense(units=4096,
activation="relu"))
alex_net.add(gluon.nn.Dense(units=4096,
activation="relu"))
alex_net.add(gluon.nn.Dense(units=10))
In [10]:
alex_net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
In [11]:
trainer = gluon.Trainer(params=alex_net.collect_params(),
optimizer='sgd',
optimizer_params={'learning_rate': .001})
In [12]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
In [13]:
def evaluate_accuracy(data_iterator, net):
acc = mx.metric.Accuracy()
for d, l in data_iterator:
data = d.as_in_context(ctx)
label = l.as_in_context(ctx)
output = net(data)
predictions = mx.nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=label)
return acc.get()[1]
In [14]:
epochs = 1
smoothing_constant = .01
for e in range(epochs):
for i, (d, l) in tqdm(enumerate(train_data)):
data = d.as_in_context(ctx)
label = l.as_in_context(ctx)
with mx.autograd.record():
output = alex_net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(data.shape[0])
##########################
# Keep a moving average of the losses
##########################
curr_loss = mx.nd.mean(loss).asscalar()
moving_loss = (curr_loss if ((i == 0) and (e == 0))
else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)
test_accuracy = evaluate_accuracy(test_data, alex_net)
train_accuracy = evaluate_accuracy(train_data, alex_net)
print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))